#!/usr/bin/env python
# coding: utf-8
# liting

import time, datetime
import sys
import os.path
import re

import smtplib
from email.mime.text import MIMEText

# 定义日志的日期名称
date = time.strftime('%F')
# 定义要过滤的日志名字
log_file = 'localhost_access_log.%s.txt' % date
# 记录日志读取过的指针位置
seek_file = '/tmp/log_check_seek.tmp'
global data

data = {'code': {}, 'time_range': {'3s': 0, '5s': 0, '10s': 0, '11s+': 0}}
# 定义邮件发送
mailto_list = ['kinda22@qq.com', '']
mail_host = "smtp.163.com"  # 设置服务器
mail_user = "alert"  # 用户名
mail_pass = "aaatest"  # 口令
mail_postfix = "163.com"  # 发件箱的后缀


def send_mail(to_list, sub, content):
    me = "tomcat_log" + "<" + mail_user + "@" + mail_postfix + ">"
    msg = MIMEText(content, _subtype='plain', _charset='gb2312')
    msg['Subject'] = sub
    msg['From'] = me
    msg['To'] = ";".join(to_list)
    try:
        server = smtplib.SMTP()
        server.connect(mail_host)
        server.login(mail_user, mail_pass)
        server.sendmail(me, to_list, msg.as_string())
        server.close()
        return True
    except Exception, e:
        print str(e)
        return False

        # 如果seek文件存在，解析时间和位置，否则seek 为0，指针移动到文件末尾


if os.path.exists(seek_file):
    # 读取文件内容
    with open(seek_file) as f:
        seek_tmp = f.readlines()
    # 解析时间和本地时间 算时间差，如果差300s或当前时间小于文件时间，都将指针移动到文件末尾
    time_seek = datetime.datetime.strptime(seek_tmp[0].strip(), '%Y-%m-%d %H:%M:%S')
    time_local = datetime.datetime.now()
    time_delta = time_local - time_seek
    # 计算本地时间是否大于seek的记录时间
    if time_local > time_seek:
        # 计算时间差，如果是在300s以内，认为有效，增量读，否则移动到文件末尾
        if time_delta.seconds < 300:
            try:
                seek = int(seek_tmp[1].strip())
                seek_where = 0
            except IndexError:
                seek = 0
                seek_where = 2
        else:
            seek = 0
            seek_where = 2
    else:
        seek = 0
        seek_where = 2
else:
    seek = 0
    seek_where = 2
# 读取日志
# 如果日志最后的指针小于文件记录中的seek，则将指针移动到文件末尾
with open(log_file) as f:
    f.seek(0, 2)
    if seek < f.tell():
        f.seek(seek, seek_where)

    pattren = re.compile(
        r'(?P<client_ip>\d+.\d+.\d+.\d+) (?P<remote_user>.*) (?P<remote_auth>.*) (?P<time>\[.*\]) \"(?P<request>.*)\" (?P<status_code>.*) (?P<sent_byte>.*) (?P<request_time>.*)')
    re_uri = re.compile(r'(?P<request_method>\w+) (?P<request_uri>.*) (?P<http_version>.*)')
    for i in f.xreadlines():
        match = pattren.match(i)

        # 生成uri的key
        request = re_uri.match(match.groupdict()['request'])

        request_uri = request.groupdict()['request_uri'].split('?')[0]
        request_uri = request_uri.split('%')[0]
        if not data.has_key(request_uri):
            data[request_uri] = {'code': {}, 'time_range': {'3s': 0, '5s': 0, '10s': 0, '11s+': 0}}

        # 计算uri 下的code
        status_code = int(match.groupdict()['status_code'])
        if data[request_uri]['code'].has_key(status_code):
            data[request_uri]['code'][status_code] += 1
        else:
            data[request_uri]['code'][status_code] = 1
        if data['code'].has_key(status_code):
            data['code'][status_code] += 1
        else:
            data['code'][status_code] = 1

        # 计算uri 下的time range
        time_range = int(match.groupdict()['request_time'])
        if time_range in range(0, 4):
            data[request_uri]['time_range']['3s'] += 1
            data['time_range']['3s'] += 1
        elif time_range in range(4, 6):
            data[request_uri]['time_range']['5s'] += 1
            data['time_range']['5s'] += 1
        elif time_range in range(6, 11):
            data[request_uri]['time_range']['10s'] += 1
            data['time_range']['10s'] += 1
        elif time_range in range(11, 60):
            data[request_uri]['time_range']['11s+'] += 1
            data['time_range']['11s+'] += 1

    content = []
    alert = []
    for k in data.keys():
        if k not in ['code', 'time_range']:
            tt = k
            total_sum = 0
            for t in sorted(data[k]['code'].keys()):
                total_sum += data[k]['code'][t]
                tt += "  " + str(t) + ":" + str(data[k]['code'][t])
            if data[k]['code'].has_key(200):
                success_code = data[k]['code'][200] / total_sum * 100
            else:
                data[k]['code'][200] = 0
                success_code = data[k]['code'][200] / total_sum * 100

            for rr in sorted(data[k]['time_range'].keys()):
                tt += "  " + rr + ":" + str(data[k]['time_range'][rr])

            success_time = data[k]['time_range']['3s'] / total_sum * 100
            if success_time < 90:
                alert.append(tt)
            if success_code < 99:
                alert.append(tt)
            content.append(tt)

    msg = '\x0a\x0d'.join(alert)
    print msg
    if len(alert) > 0:
        if send_mail(mailto_list, "TV_tomcat_Alert-Report", msg):
            print 'send ok'
        else:
            print 'send err'
    seek = f.tell()
with open(seek_file, 'w') as f:
    f.write(time.strftime('%F %X') + "\n")
    f.write(str(seek) + "\n")
